
do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"


********Gen teacher by year VA measures 
****Merge with master build file and test accomodations file
global bootreps = 100
local bootreps = $bootreps



	*Get math student-teacher match
	use "$basedata/course_mem_clean.dta", clear
	keep birthdt schlcode year lea statecourse section meetingcode grade mastid teachid numstudents
	ren (*) *_ma
	 
	ren mastid_ma mastid
	ren year_ma year
	 
	 
	sort mastid year
	
	 
	gen birthdt = birthdt_ma
	drop birthdt_*
	 
	gen grade = grade_ma
	drop grade_*
	 
	 
	merge 1:1 mastid year using "$basedata/mb_clean.dta"
	ren _merge mrg_mb

	* keep only the standard tests
	replace score_ma = . if !inlist(ma_test_id,"RG","MA03","MA04","MA05","MA06","MA07","MA08")
	
	merge 1:1 mastid year using "$basedata/test_accom.dta"
	ren _merge mrg_accom
	xtset mastid year
	******Generate age variable
	gen age =  (mdy(6,30,year)-birthdt)/365.25
	replace age=0 if age==.
	 
	egen teach_year_ma=group(teachid_ma year)
	egen teach_schl_ma=group(teachid_ma schlcode_ma lea_ma)
	gen l_grade=l.grade
	* GEN lagged scores, lagged squares, and lagged cubes
	* CREATE INTERACTIONS OF PRIOR TEST SCORES WITH GRADE LEVEL
	foreach i in ma {
		cap gen l_`i'=l.score_`i'
		cap gen l_`i'_sq= l_`i'^2
		cap gen l_`i'_cb= l_`i'^3
		cap gen l_`i'_miss=(l_`i'==.)
		cap xi i.l_grade|l_`i' i.l_grade|l_`i'_sq i.l_grade|l_`i'_cb  , prefix(IL`i')
	}
	**Focus on grade 4-8
	drop if grade<4
	drop if grade>8
	****Generate teacher-by-school-by-studenttype-VA, teacher-by-year-VA, teacher-by-school VA
	gen lag_ma=l_ma


	egen teach_DISADV_yr_ma=group(teachid_ma DISADV year)
	egen teach_DISADV_schl_ma=group(teachid_ma DISADV schlcode_ma lea_ma)

	egen section_size_ma=sum(1), by(year schlcode_ma lea_ma  section_ma teachid_ma)
	****class domographics
	gen white=(ethnic==5)
	gen black=(ethnic==4)
	gen hispanic=(ethnic==3)

	foreach ss in ma {
		foreach i in white black hispanic female disability DISADV ENG_LEARN {
			egen class_mn_`i'_`ss'=mean(`i'), by(year schlcode_`ss' lea_`ss'  section_`ss' teachid_`ss')
				egen schl_mn_`i'_`ss'=mean(`i'), by(year schlcode_`ss' lea_`ss')
		}
	}

	****Generate indicators for data missingness
	foreach i in grade ethnic female gifted disability MIGRANT ENG_LEARN DISADV DISADV_persist year read_accom math_accom numstudents_ma {
		cap gen `i'_miss=(`i'==.)
		replace `i'=0 if `i'==.
	}
	replace numstudents_ma=section_size_ma if numstudents_ma==0
	 
	egen sidx_ma = group(schlcode_ma lea_ma)	 
	 
	local student_cov2 i.grade i.ethnic i.female i.gifted i.disability i.MIGRANT i.ENG_LEARN  i.DISADV_persist  math_accom  *_miss i.year#i.grade age

	cap egen schid_ma= group(schlcode_ma lea_ma)

	save "$temp/boot_build", replace



	
	
	forv b=1/`bootreps' {
		
	di "Starting bootstrap replication `b'"
	
	qui {

	foreach ss in ma {
		use "$temp/boot_build", clear
		
		bsample, cluster(teachid_`ss') idcluster(bteachid_`ss')
		
		cap gen ncerdc_id=teachid_`ss'
		
		cap drop teachid_`ss' teach_year_`ss' teach_schl_`ss' teach_DISADV_yr_`ss' teach_DISADV_schl_`ss'
		
		ren bteachid_`ss' teachid_`ss'
		
		egen teach_year_ma=group(teachid_ma year)
		egen teach_schl_ma=group(teachid_ma schlcode_ma lea_ma)
	
		egen teach_DISADV_yr_ma=group(teachid_ma DISADV year)
		egen teach_DISADV_schl_ma=group(teachid_ma DISADV schlcode_ma lea_ma)
		
		
		local student_cov2 i.grade i.ethnic i.female i.gifted i.disability i.MIGRANT i.ENG_LEARN  i.DISADV_persist read_accom math_accom  *_miss i.year#i.grade age
		
		***Focus sample 
		qui areg score_`ss' IL* `student_cov2' numstudents_`ss' , absorb(teach_year_`ss')
		predict VA_1yr, d
		cap gen samp=e(sample)
		drop if samp!=1
		cap egen stu_count =sum(samp), by (teachid_`ss' year)
		capture egen tag_tch_sch_yr=tag(teachid_`ss' year schlcode_`ss' lea_`ss')
		cap egen tchr_schls=sum(tag_tch_sch_yr), by(teachid_`ss' year)

		****Drop teachers who teach SPED and ELL

		drop if class_mn_disability_`ss'>.5
		drop if class_mn_ENG_LEARN_`ss'>.5


		 
		********************************************************************
		****Generate teacher school EB VA measures

		capture egen tid=group(teachid_`ss' DISADV)
		cap egen tidy=group(year schlcode_`ss' lea_`ss' section_`ss' DISADV teachid_`ss' )
		cap egen sch_by_yr=group(schlcode_`ss' lea_`ss' year)

		cap gen sy=year
		merge m:1 ncerdc_id sy using "$basedata/ncerdc_experience.dta" 
		drop if _merge==2
		drop _merge


		local student_cov2 i.grade i.ethnic i.female i.gifted i.disability i.MIGRANT i.ENG_LEARN  i.DISADV_persist read_accom math_accom  *_miss i.year#i.grade age

		gen vijt=.
		gen sample=.
		foreach i in 0 1 {
			areg score_`ss' IL* `student_cov2' numstudents_`ss' if DISADV==`i', absorb(tidy)
			replace sample=1 if e(sample)==1
			predict temp if DISADV==`i' & e(sample)==1, d 
			predict temp1 if DISADV==`i' & e(sample)==1, r
			replace vijt=temp+temp1  if DISADV==`i' & e(sample)==1
			drop temp1 temp
		}

		save "$temp/student_level_`ss'_w_unshrunken_VA_boot", replace	
	}


	foreach ss in ma { 

		use "$temp/student_level_`ss'_w_unshrunken_VA_boot", clear

		cap drop lea schlcode

		gen expdum = tchr_exp_pay_level
		replace expdum = 6 if tchr_exp_pay_level>6 & tchr_exp_pay_level!=.
		replace expdum = 7 if tchr_exp_pay_level==.
		assert expdum<=7 & floor(expdum)==expdum

		ren schid_`ss' schid
		ren lea_`ss' lea
		ren schlcode_`ss' schlcode
		ren teachid_`ss' teachid
		ren section_`ss' section
		ren sidx_`ss' sidx

		preserve
			gen numstudentstidy = 1
			collapse (mean) exp* tchr_exp_pay_level vijt DISADV (sum) numstudentstidy, by(tid sch_by_yr schid tidy year)


			reghdfe vijt i.expdum [fw=numstudentstidy], absorb(tch_e0_sfe=tid sfe=schid year) res(resid_tch_sfe)


			***predict only the fitted experience profile
			predict xb,xb
			cap gen VA_tchr_exp_sfe= xb+tch_e0_sfe

			collapse (mean) xb VA_tchr_exp_sfe sfe, by(tid sch_by_yr schid tidy)
			tempfile tempVA
			save `tempVA', replace
		restore

		sort tid sch_by_yr schid tidy
		merge n:1 tid sch_by_yr schid tidy using `tempVA'
		assert _m==3
		drop _m

		* construct class variable
		egen jtidx = group(teachid year schlcode lea)
		sort jtidx section
		gen newsection = jtidx!=jtidx[_n-1] | section!=section[_n-1]
		gen secnum = 1 if jtidx!=jtidx[_n-1]
		replace secnum = secnum[_n-1]+newsection if secnum==.

		gen j = teachid // teacher index
		gen c = secnum // class index (numbered 1 to C, reset for each teacher-year)
		gen m = 1*(DISADV==0) + 2*(DISADV==1) // student types (numered 1 to M)
		gen t = year // year
		gen r = vijt - xb - sfe // student-level residual: original residual minus experience effects minus school-year FEs
		gen alphaZ = xb // estimated experience effect (from prior step), varies by teacher-year
		gen e = tchr_exp_pay_level
		gen s = sidx

		keep j c m t r alphaZ e s schlcode lea vijt sfe

		qui summ t
		global tmin = r(min)
		global tmax = r(max)

		global nt = $tmax - $tmin

		** estimate the variance of the student error, by type

		* take the student residual, minus the classroom mean, by type
		* square the residual and take the mean across the data (mean residual is 0, so square = variance)

		bys j c m t: egen mean_resid_ct = mean(r)
		bys j c m t: egen n_resid_ct = count(r)
		gen resid_i_sq = (r-mean_resid_ct)^2
		gen resid_i_sq_c = resid_i_sq * n_resid_ct/(n_resid_ct-1)

		*Variance of student residuals
		qui summ resid_i_sq_c if m==1
		gen sigma2_eps1 = r(mean)

		qui summ resid_i_sq_c if m==2
		gen sigma2_eps2 = r(mean)

		** collapse the student residuals to the teacher-classroom-type level 
		* calculate the mean and the number of observations

		collapse (count) n_ct=r (mean) Abar=r alphaZ sigma* e vijt sfe, by(j c m t s schlcode lea)

		** estimate the covariances and variances necessary for the structural parameters

		* cov(\bar{A}_{jmct},\bar{A}_{jmc't}) for m=1,2
		* will just keep classrooms 1 and 2. can add more for precision

		preserve

			gen Abar_c1 = Abar if c==1
			gen Abar_c2 = Abar if c==2

			collapse (mean) Abar_c1 Abar_c2, by(j m t s)

			keep if Abar_c1!=. & Abar_c2!=.

			corr Abar_c1 Abar_c2 if m==1, covariance
			local sigma2_muj1 = r(cov_12)

			corr Abar_c1 Abar_c2 if m==2, covariance
			local sigma2_muj2 = r(cov_12)

		restore

		gen sigma2_muj1 = `sigma2_muj1'
		gen sigma2_muj2 = `sigma2_muj2'

		* cov(\bar{A}_{j1ct},\bar{A}_{j2c't})

		preserve

			gen Abar_c1_m1 = Abar if c==1 & m==1
			gen Abar_c1_m2 = Abar if c==1 & m==2
			gen Abar_c2_m1 = Abar if c==2 & m==1
			gen Abar_c2_m2 = Abar if c==2 & m==2

			collapse (mean) Abar_c1_m* Abar_c2_m*, by(j t s)

			* we want to make both types of comparisons (c=1,m=1) vs. (c=2,m=2) AND (c=1,m=2) vs. (c=2,m=1)
			* expand data
			expand 2, gen(obsnum)

			gen Abar_c_m_1 = Abar_c1_m1 if obsnum==1
			replace Abar_c_m_1 = Abar_c2_m1 if obsnum==0

			gen Abar_c_m_2 = Abar_c2_m2 if obsnum==1
			replace Abar_c_m_2 = Abar_c1_m2 if obsnum==0

			corr Abar_c_m_1 Abar_c_m_2, covariance
			local sigma_muj1muj2 = r(cov_12)

		restore

		gen sigma_muj1muj2 = `sigma_muj1muj2'

		* cov(\bar{A}_{j1ct},\bar{A}_{j2ct})

		preserve

			gen Abar_m1 = Abar if m==1
			gen Abar_m2 = Abar if m==2

			collapse (mean) Abar_m1 Abar_m2, by(j c t s)

			corr Abar_m1 Abar_m2, covariance
			local cov_m1m2 = r(cov_12)

		restore

		gen sigma_theta1theta2 = `cov_m1m2' - sigma_muj1muj2

		* Var(\bar{A}_{jmct})

		forv mm=1/2 {

			qui summ Abar if m==`mm'
			gen VarAbar_m`mm' = r(Var)

			gen temp`mm' = VarAbar_m`mm' - sigma2_muj`mm' - (sigma2_eps`mm' / n) if m==`mm'

			qui summ temp`mm' 
			gen sigma2_theta`mm' = r(mean)

		}

		drop temp*


		* estimate drift parameters, by comparing across years (sometimes within, sometimes across type)
		* estimate \sigma_{\mu_1,|t-s|}, \sigma_{\mu_2,|t-s|}, \sigma_{\mu_1 \mu_2,|t-s|}

		preserve

			gen Abar_m1_wgt = Abar*n_ct if m==1 // want to calculate mean residual, weighted by number of students
			gen Abar_m2_wgt = Abar*n_ct if m==2

			gen n_ct_m1 = n_ct if m==1
			gen n_ct_m2 = n_ct if m==2

			collapse (sum) n_ct_m1 n_ct_m2 Abar_m1_wgt Abar_m2_wgt, by(j t)

			gen Abar_m1 = Abar_m1_wgt / n_ct_m1
			gen Abar_m2 = Abar_m2_wgt / n_ct_m2

			egen teacher_index = group(j)
			gen teacher_odd = mod(teacher_index,2)==1

			xtset j t

			local nt = $nt

			forv ts=1/`nt' {
				gen Abar_m1_L`ts' = l`ts'.Abar_m1
				gen Abar_m2_L`ts' = l`ts'.Abar_m2
				
				corr Abar_m1 Abar_m1_L`ts', covariance
				local cov_m1m1_`ts' = r(cov_12)
				
				corr Abar_m2 Abar_m2_L`ts', covariance
				local cov_m2m2_`ts' = r(cov_12)
				
				gen x1 = Abar_m1*(teacher_odd==1) + Abar_m2*(teacher_odd==0)
				gen x2 = Abar_m2_L`ts'*(teacher_odd==1) + Abar_m1_L`ts'*(teacher_odd==0)
				corr x1 x2, covariance
				local cov_m1m2_`ts' = r(cov_12)
				
				drop Abar_m1_L`ts' Abar_m2_L`ts' x1 x2

			}
		restore

		local nt = $nt

		forv ts=1/`nt' {
			gen sigma_m1m1_`ts' = `cov_m1m1_`ts''
			gen sigma_m2m2_`ts' = `cov_m2m2_`ts''
			gen sigma_m1m2_`ts' = `cov_m1m2_`ts''
		}
		keep if _n==1
		keep sigma*
		gen b = `b'

		if `b'>1 {
			append using "$temp/production_structural_param_boot"
		}
		sort b
		
		save "$temp/production_structural_param_boot", replace


	}
	}
}